Libraries & Dependencies¶

In [1]:
import pandas as pd
pd.options.display.float_format = '{:,.4f}'.format

import numpy as np

from sklearn.metrics import r2_score, mean_absolute_error,mean_absolute_percentage_error
from scipy import optimize

import plotly.graph_objects as go

import warnings
warnings.filterwarnings('ignore')

Credit¶

https://github.com/emilyripka/BlogRepo/blob/master/181113_CurveFitting.ipynb

image.png

Data Prep¶

In [2]:
# Average LTV per player. Caluculated as total revenue divided by total install of the cohort

d1_ltv = 0.082

d3_ltv = 0.098

d7_ltv = 0.117

d14_ltv = 0.137

d30_ltv = 0.17

d60_ltv = 0.19

d90_ltv = 0.21
In [3]:
one_three = list(np.linspace(d1_ltv, d3_ltv, 3, endpoint=True))
three_seven = list(np.linspace(d3_ltv, d7_ltv, 5, endpoint=True))[1:]
seven_fourteen = list(np.linspace(d7_ltv, d14_ltv, 8, endpoint=True))[1:]
fourteen_thirty = list(np.linspace(d14_ltv, d30_ltv, 17, endpoint=True))[1:]
thirty_sixty = list(np.linspace(d30_ltv, d60_ltv, 31, endpoint=True))[1:]
sixty_ninety = list(np.linspace(d60_ltv, d90_ltv, 31, endpoint=True))[1:]
In [4]:
ltvs = [round(item, 4) for item  in \
        one_three + three_seven + seven_fourteen + fourteen_thirty + thirty_sixty + sixty_ninety]
In [5]:
df = pd.DataFrame.from_dict({'Day':[x for x in range(1,91)],
                             'LTV': [0.082, 0.09, 0.098, 0.1028, 0.1075, 0.1122, 
                                     0.117, 0.1199, 0.1227, 0.1256, 0.1284, 0.1313, 
                                     0.1341, 0.137, 0.1391, 0.1411, 0.1432, 0.1453, 
                                     0.1473, 0.1494, 0.1514, 0.1535, 0.1556, 0.1576, 
                                     0.1597, 0.1618, 0.1638, 0.1659, 0.1679, 0.17, 
                                     0.1707, 0.1713, 0.172, 0.1727, 0.1733, 0.174, 
                                     0.1747, 0.1753, 0.176, 0.1767, 0.1773, 0.178, 
                                     0.1787, 0.1793, 0.18, 0.1807, 0.1813, 0.182, 
                                     0.1827, 0.1833, 0.184, 0.1847, 0.1853, 0.186, 
                                     0.1867, 0.1873, 0.188, 0.1887, 0.1893, 0.19, 
                                     0.1907, 0.1913, 0.192, 0.1927, 0.1933, 0.194, 
                                     0.1947, 0.1953, 0.196, 0.1967, 0.1973, 0.198, 
                                     0.1987, 0.1993, 0.2, 0.2007, 0.2013, 0.202, 
                                     0.2027, 0.2033, 0.204, 0.2047, 0.2053, 0.206, 
                                     0.2067, 0.2073, 0.208, 0.2087, 0.2093, 0.21]
                            })

df
Out[5]:
Day LTV
0 1 0.0820
1 2 0.0900
2 3 0.0980
3 4 0.1028
4 5 0.1075
... ... ...
85 86 0.2073
86 87 0.2080
87 88 0.2087
88 89 0.2093
89 90 0.2100

90 rows × 2 columns

Visualize ze data¶

In [6]:
fig = go.Figure()

# Add traces
fig.add_trace(go.Scatter(x=df.Day, y=df.LTV,
                    mode='markers',
                    name='Raw Data'))

fig.update_layout({ 'plot_bgcolor': 'rgba(0, 0, 0, 0)',
                    'paper_bgcolor': 'rgba(0, 0, 0, 0)',
                    'title': dict(text='<b>LTV curves <b>',x=0.5,y=0.95)})

Functions¶

In [7]:
def linear_function(x, a, b):
    """Linear regression with extra steps"""
    return a * x + b

def logarithmic_function(x, a, b, c):
    """Return values from a general log function."""
    return a * np.log(b * x) + c

def exponential_function(x, a, k, b):
    """Stanard exponential equation."""
    return a * np.exp(x*k) + b

Linear¶

In [8]:
# using the scipy library to fit the x- and y-axis data 
# this function returns:
#   popt_linear: this contains the fitting parameters
#   pcov_linear: estimated covariance of the fitting paramters

def linear_function(x, a, b):
    """Linear regression with extra steps"""
    return a * x + b

popt_linear, pcov_linear = optimize.curve_fit(linear_function, df.Day, df.LTV)
In [9]:
(popt_linear, pcov_linear )
Out[9]:
(array([0.00110147, 0.12211755]),
 array([[ 1.89963722e-09, -8.64334941e-08],
        [-8.64334941e-08,  5.21482085e-06]]))
In [10]:
linear_function(1002, *popt_linear)
Out[10]:
1.2257906315010743
In [11]:
linear_function(range(1,91), *popt_linear)
Out[11]:
array([0.12321902, 0.12432049, 0.12542196, 0.12652343, 0.1276249 ,
       0.12872637, 0.12982784, 0.13092931, 0.13203078, 0.13313225,
       0.13423372, 0.13533519, 0.13643666, 0.13753813, 0.13863961,
       0.13974108, 0.14084255, 0.14194402, 0.14304549, 0.14414696,
       0.14524843, 0.1463499 , 0.14745137, 0.14855284, 0.14965431,
       0.15075578, 0.15185725, 0.15295872, 0.15406019, 0.15516166,
       0.15626313, 0.1573646 , 0.15846607, 0.15956754, 0.16066901,
       0.16177048, 0.16287195, 0.16397342, 0.16507489, 0.16617636,
       0.16727783, 0.1683793 , 0.16948077, 0.17058224, 0.17168371,
       0.17278518, 0.17388665, 0.17498812, 0.17608959, 0.17719106,
       0.17829253, 0.179394  , 0.18049547, 0.18159694, 0.18269841,
       0.18379988, 0.18490135, 0.18600282, 0.18710429, 0.18820576,
       0.18930723, 0.1904087 , 0.19151017, 0.19261164, 0.19371311,
       0.19481458, 0.19591605, 0.19701752, 0.19811899, 0.19922046,
       0.20032193, 0.2014234 , 0.20252487, 0.20362634, 0.20472781,
       0.20582928, 0.20693075, 0.20803222, 0.20913369, 0.21023516,
       0.21133663, 0.2124381 , 0.21353957, 0.21464104, 0.21574251,
       0.21684398, 0.21794546, 0.21904693, 0.2201484 , 0.22124987])
In [12]:
print(
f"""R2:{r2_score(df.LTV, linear_function(range(1,91), *popt_linear)).round(2)}
MAE:{mean_absolute_error(df.LTV, linear_function(range(1,91), *popt_linear)).round(3)}
MAPE:{(mean_absolute_percentage_error(df.LTV, linear_function(range(1,91), *popt_linear))*100).round(2)}%""")
R2:0.88
MAE:0.008
MAPE:5.61%
In [13]:
fig = go.Figure()

# Add traces
fig.add_trace(go.Scatter(x=df.Day, y=df.LTV,
                    mode='markers',
                    name='Raw Data'))

fig.add_trace(go.Scatter(x=[x for x in range(1,361)], y=linear_function(range(1,361), *popt_linear),
                    mode='lines',
                    name='Linear Curve'))

fig.update_layout({ 'plot_bgcolor': 'rgba(0, 0, 0, 0)',
                    'paper_bgcolor': 'rgba(0, 0, 0, 0)',
                    'title': dict(text='<b>LTV curves <b>',x=0.5,y=0.95)})

Logarithmic¶

In [14]:
# using the scipy library to fit the x- and y-axis data 
# this function returns:

#   popt_log: this contains the fitting parameters
#   pcov_log: estimated covariance of the fitting paramters

def logarithmic_function(x, a, b, c):
    """Return values from a general log function."""
    return a * np.log(b * x) + c

popt_log, pcov_log = optimize.curve_fit(logarithmic_function, df.Day, df.LTV)

#logarithmic_function(5, *popt_log)
popt_log
Out[14]:
array([0.03291741, 0.01373978, 0.19700252])
In [15]:
print(
f"""R2:{r2_score(df.LTV, logarithmic_function(range(1,91), *popt_log)).round(2)}
MAE:{mean_absolute_error(df.LTV, logarithmic_function(range(1,91), *popt_log)).round(3)}
MAPE:{(mean_absolute_percentage_error(df.LTV, logarithmic_function(range(1,91), *popt_log))*100).round(2)}%""")
R2:0.98
MAE:0.003
MAPE:1.99%
In [16]:
import plotly.graph_objects as go

fig = go.Figure()

# Add traces
fig.add_trace(go.Scatter(x=df.Day, y=df.LTV,
                    mode='markers',
                    name='Raw Data'))

fig.add_trace(go.Scatter(x=[x for x in range(1,181)], y=linear_function(range(1,181), *popt_linear),
                    mode='lines',
                    name='Linear Curve'))

fig.add_trace(go.Scatter(x=[x for x in range(1,181)], y=logarithmic_function(range(1,181), *popt_log),
                    mode='lines',
                    name='Log Curve'))


fig.update_layout({ 'plot_bgcolor': 'rgba(0, 0, 0, 0)',
                    'paper_bgcolor': 'rgba(0, 0, 0, 0)',
                    'title': dict(text='<b>LTV curves <b>',x=0.5,y=0.95)})

Exponential¶

In [17]:
# using the scipy library to fit the x- and y-axis data 
# this function returns:
#   popt_exp: this contains the fitting parameters
#   pcov_exp: estimated covariance of the fitting paramters

def exponential_function(x, a, k, b):
    """Stanard exponential equation."""
    #return a * np.exp(x*k) + b
    return a * np.exp(x*-k) + b


popt_exp, pcov_exp = optimize.curve_fit(exponential_function, df.Day, df.LTV)
popt_exp
#exponential_function(5, *popt_exp)
Out[17]:
array([-0.1202182 ,  0.03411894,  0.2089351 ])
In [ ]:
print(
f"""R2:{r2_score(df.LTV, exponential_function(range(1,91), *popt_exp)).round(2)}
MAE:{mean_absolute_error(df.LTV, exponential_function(range(1,91), *popt_exp)).round(3)}
MAPE:{(mean_absolute_percentage_error(df.LTV, exponential_function(range(1,91), *popt_exp))*100).round(2)}%""")
In [18]:
import plotly.graph_objects as go

fig = go.Figure()

# Add traces
fig.add_trace(go.Scatter(x=df.Day, y=df.LTV,
                    mode='markers',
                    name='Raw Data'))

fig.add_trace(go.Scatter(x=[x for x in range(1,181)], y=linear_function(range(1,181), *popt_linear),
                    mode='lines',
                    name='Linear Curve'))

fig.add_trace(go.Scatter(x=[x for x in range(1,181)], y=logarithmic_function(range(1,181), *popt_log),
                    mode='lines',
                    name='Log Curve'))

fig.add_trace(go.Scatter(x=[x for x in range(1,181)], y=exponential_function(range(1,181), *popt_exp),
                    mode='lines',
                    name='Exp Curve'))


fig.update_layout({ 'plot_bgcolor': 'rgba(0, 0, 0, 0)',
                    'paper_bgcolor': 'rgba(0, 0, 0, 0)',
                    'title': dict(text='<b>LTV curves <b>',x=0.5,y=0.95)})

fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="center",
    x=0.5
))
In [19]:
pd.DataFrame.from_dict({"Curve": ['Linear',
                                  'Log',
                                  'Exp']
                        
                        ,"R2":[r2_score(df.LTV, linear_function(range(1,91), *popt_linear)).round(4),
                               r2_score(df.LTV, logarithmic_function(range(1,91), *popt_log)).round(4),
                               r2_score(df.LTV, exponential_function(range(1,91), *popt_exp)).round(4)]
                        
                        ,"MAE":[mean_absolute_error(df.LTV, linear_function(range(1,91), *popt_linear)).round(4),
                                mean_absolute_error(df.LTV, logarithmic_function(range(1,91), *popt_log)).round(4),
                                mean_absolute_error(df.LTV, exponential_function(range(1,91), *popt_exp)).round(4)]
                        
                        ,"MAPE":[(mean_absolute_percentage_error(df.LTV, linear_function(range(1,91), *popt_linear))*100).round(2),
                                 (mean_absolute_percentage_error(df.LTV, logarithmic_function(range(1,91), *popt_log))*100).round(2),
                                 (mean_absolute_percentage_error(df.LTV, exponential_function(range(1,91), *popt_exp))*100).round(2)]
                               
                             })
Out[19]:
Curve R2 MAE MAPE
0 Linear 0.8789 0.0082 5.6100
1 Log 0.9799 0.0028 1.9900
2 Exp 0.9897 0.0026 1.6200
In [ ]: